'''
	一、思路分析
			找到数据来源
				https://fanqienovel.com/reader/7253751198681924134?enter_from=reader
	二、代码流程
		1.发送请求
		2.提取数据
'''

import requests  # 发送请求
import parsel  # 解析数据
import os


dict_data = {
	'58670': '0',
	'58413': '1',
	'58678': '2',
	'58371': '3',
	'58353': '4',
	'58480': '5',
	'58359': '6',
	'58449': '7',
	'58540': '8',
	'58692': '9',
	'58712': 'a',
	'58542': 'b',
	'58575': 'c',
	'58626': 'd',
	'58691': 'e',
	'58561': 'f',
	'58362': 'g',
	'58619': 'h',
	'58430': 'i',
	'58531': 'j',
	'58588': 'k',
	'58440': 'l',
	'58681': 'm',
	'58631': 'n',
	'58376': 'o',
	'58429': 'p',
	'58555': 'q',
	'58498': 'r',
	'58518': 's',
	'58453': 't',
	'58397': 'u',
	'58356': 'v',
	'58435': 'w',
	'58514': 'x',
	'58482': 'y',
	'58529': 'z',
	'58515': 'A',
	'58688': 'B',
	'58709': 'C',
	'58344': 'D',
	'58656': 'E',
	'58381': 'F',
	'58576': 'G',
	'58516': 'H',
	'58463': 'I',
	'58649': 'J',
	'58571': 'K',
	'58558': 'L',
	'58433': 'M',
	'58517': 'N',
	'58387': 'O',
	'58687': 'P',
	'58537': 'Q',
	'58541': 'R',
	'58458': 'S',
	'58390': 'T',
	'58466': 'U',
	'58386': 'V',
	'58697': 'W',
	'58519': 'X',
	'58511': 'Y',
	'58634': 'Z',
	'58611': '的',
	'58590': '一',
	'58398': '是',
	'58422': '了',
	'58657': '我',
	'58666': '不',
	'58562': '人',
	'58345': '在',
	'58510': '他',
	'58496': '有',
	'58654': '这',
	'58441': '个',
	'58493': '上',
	'58714': '们',
	'58618': '来',
	'58528': '到',
	'58620': '时',
	'58403': '大',
	'58461': '地',
	'58481': '为',
	'58700': '子',
	'58708': '中',
	'58503': '你',
	'58442': '说',
	'58639': '生',
	'58506': '国',
	'58663': '年',
	'58436': '着',
	'58563': '就',
	'58391': '那',
	'58357': '和',
	'58354': '要',
	'58695': '她',
	'58372': '出',
	'58696': '也',
	'58551': '得',
	'58445': '里',
	'58408': '后',
	'58599': '自',
	'58424': '以',
	'58394': '会',
	'58348': '家',
	'58426': '可',
	'58673': '下',
	'58417': '而',
	'58556': '过',
	'58603': '天',
	'58565': '去',
	'58604': '能',
	'58522': '对',
	'58632': '小',
	'58622': '多',
	'58350': '然',
	'58605': '于',
	'58617': '心',
	'58401': '学',
	'58637': '么',
	'58684': '之',
	'58382': '都',
	'58464': '好',
	'58487': '看',
	'58693': '起',
	'58608': '发',
	'58392': '当',
	'58474': '没',
	'58601': '成',
	'58355': '只',
	'58573': '如',
	'58499': '事',
	'58469': '把',
	'58361': '还',
	'58698': '用',
	'58489': '第',
	'58711': '样',
	'58457': '道',
	'58635': '想',
	'58492': '作',
	'58647': '种',
	'58623': '开',
	'58521': '美',
	'58609': '总',
	'58530': '从',
	'58665': '无',
	'58652': '情',
	'58676': '己',
	'58456': '面',
	'58581': '最',
	'58509': '女',
	'58488': '但',
	'58363': '现',
	'58685': '前',
	'58396': '些',
	'58523': '所',
	'58471': '同',
	'58485': '日',
	'58613': '手',
	'58533': '又',
	'58589': '行',
	'58527': '意',
	'58593': '动',
	'58699': '方',
	'58707': '期',
	'58414': '它',
	'58596': '头',
	'58570': '经',
	'58660': '长',
	'58364': '儿',
	'58526': '回',
	'58501': '位',
	'58638': '分',
	'58404': '爱',
	'58677': '老',
	'58535': '因',
	'58629': '很',
	'58577': '给',
	'58606': '名',
	'58497': '法',
	'58662': '间',
	'58479': '斯',
	'58532': '知',
	'58380': '世',
	'58385': '什',
	'58405': '两',
	'58644': '次',
	'58578': '使',
	'58505': '身',
	'58564': '者',
	'58412': '被',
	'58686': '高',
	'58624': '已',
	'58667': '亲',
	'58607': '其',
	'58616': '进',
	'58368': '此',
	'58427': '话',
	'58423': '常',
	'58633': '与',
	'58525': '活',
	'58543': '正',
	'58418': '感',
	'58597': '见',
	'58683': '明',
	'58507': '问',
	'58621': '力',
	'58703': '理',
	'58438': '尔',
	'58536': '点',
	'58384': '文',
	'58484': '几',
	'58539': '定',
	'58554': '本',
	'58421': '公',
	'58347': '特',
	'58569': '做',
	'58710': '外',
	'58574': '孩',
	'58375': '相',
	'58645': '西',
	'58592': '果',
	'58572': '走',
	'58388': '将',
	'58370': '月',
	'58399': '十',
	'58651': '实',
	'58546': '向',
	'58504': '声',
	'58419': '车',
	'58407': '全',
	'58672': '信',
	'58675': '重',
	'58538': '三',
	'58465': '机',
	'58374': '工',
	'58579': '物',
	'58402': '气',
	'58702': '每',
	'58553': '并',
	'58360': '别',
	'58389': '真',
	'58560': '打',
	'58690': '太',
	'58473': '新',
	'58512': '比',
	'58653': '才',
	'58704': '便',
	'58545': '夫',
	'58641': '再',
	'58475': '书',
	'58583': '部',
	'58472': '水',
	'58478': '像',
	'58664': '眼',
	'58586': '等',
	'58568': '体',
	'58674': '却',
	'58490': '加',
	'58476': '电',
	'58346': '主',
	'58630': '界',
	'58595': '门',
	'58502': '利',
	'58713': '海',
	'58587': '受',
	'58548': '听',
	'58351': '表',
	'58547': '德',
	'58443': '少',
	'58460': '克',
	'58636': '代',
	'58585': '员',
	'58625': '许',
	'58694': '稜',
	'58428': '先',
	'58640': '口',
	'58628': '由',
	'58612': '死',
	'58446': '安',
	'58468': '写',
	'58410': '性',
	'58508': '马',
	'58594': '光',
	'58483': '白',
	'58544': '或',
	'58495': '住',
	'58450': '难',
	'58643': '望',
	'58486': '教',
	'58406': '命',
	'58447': '花',
	'58669': '结',
	'58415': '乐',
	'58444': '色',
	'58549': '更',
	'58494': '拉',
	'58409': '东',
	'58658': '神',
	'58557': '记',
	'58602': '处',
	'58559': '让',
	'58610': '母',
	'58513': '父',
	'58500': '应',
	'58378': '直',
	'58680': '字',
	'58352': '场',
	'58383': '平',
	'58454': '报',
	'58671': '友',
	'58668': '关',
	'58452': '放',
	'58627': '至',
	'58400': '张',
	'58455': '认',
	'58416': '接',
	'58552': '告',
	'58614': '入',
	'58582': '笑',
	'58534': '内',
	'58701': '英',
	'58349': '军',
	'58491': '候',
	'58467': '民',
	'58365': '岁',
	'58598': '往',
	'58425': '何',
	'58462': '度',
	'58420': '山',
	'58661': '觉',
	'58615': '路',
	'58648': '带',
	'58470': '万',
	'58377': '男',
	'58520': '边',
	'58646': '风',
	'58600': '解',
	'58431': '叫',
	'58715': '仁',
	'58524': '金',
	'58439': '快',
	'58566': '原',
	'58477': '吃',
	'58642': '妈',
	'58437': '变',
	'58411': '通',
	'58451': '师',
	'58395': '立',
	'58369': '象',
	'58706': '数',
	'58705': '四',
	'58379': '失',
	'58567': '满',
	'58373': '战',
	'58448': '远',
	'58659': '格',
	'58434': '士',
	'58679': '音',
	'58432': '轻',
	'58689': '目',
	'58591': '条',
	'58682': '呢',
}


def get_text(url, cookies=None):
	headers = {
		'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'
	}
	
	# 添加Cookie信息
	if cookies:
		headers['Cookie'] = cookies
	
	response = requests.get(url=url, headers=headers)
	html_data = response.text
	# print(html_data)
	
	# //div[@class="muye-reader-content noselect"]
	# string(//div[@class="muye-reader-content noselect"])
	
	# 拿到小说内容
	select = parsel.Selector(html_data)  # 传入数据作为初始化
	text = select.xpath('string(//div[@class="muye-reader-content noselect"])').get()
	
	return text
	

def get_content(text):
	content = ""
	
	for t in text:
		try:
			t1 = dict_data[str(ord(t))]
		except:
			t1 = t
		content += t1
	
	return content


def transform_content(content):
	new_content = ""
	c0 = '。'
	c1 = '”'
	c2 = '\n\n'
	
	i = 0
	n = len(content)
	while i < n:
		new_content += content[i]
		
		if content[i] == c0:
			if i < n - 1 and content[i + 1] == c1:
				new_content += content[i + 1]
				i += 1
			new_content += c2
		elif content[i] == c1:
			if i < n - 1 and content[i + 1] == c1:
				i += 1
			new_content += c2
			
		i += 1
		
	return new_content


def write_in_file(new_content):
	# 定义输出目录
	output_dir = "./output/"
	
	# 查找当前目录下所有文件
	files = os.listdir(output_dir)
	
	# 提取文件编号并找到最大的编号
	existing_numbers = []
	for file in files:
		if file.startswith("000") and file.endswith(".txt"):
			try:
				number = int(file[3:-4])
				existing_numbers.append(number)
			except ValueError:
				continue
	
	# 找到最大的编号
	if existing_numbers:
		max_number = max(existing_numbers)
	else:
		max_number = 0
	
	# 生成新的文件名
	new_file_number = max_number + 1
	new_file_name = f"{output_dir}000{new_file_number:04d}.txt"
	
	# 写入内容
	with open(new_file_name, "w", encoding="utf-8") as f:
		f.write(new_content)
	
	print(f"内容已写入 {new_file_name}")


def get_one_file(url, cookie):
	text = get_text(url, cookie)
	content = get_content(text)
	new_content = transform_content(content)
	write_in_file(new_content)


my_cookies = "s_v_web_id=verify_m0njy975_xmNyW7AL_NHsX_4I5O_9RoD_1yC2g0HOXoFh; novel_web_id=7410690429950232105; serial_uuid=7410690429950232105; serial_webid=7410690429950232105; passport_csrf_token=c9debaaab7006a84a0b032c45597d11b; passport_csrf_token_default=c9debaaab7006a84a0b032c45597d11b; d_ticket=1fd59750b345db68284576647c473670853e1; n_mh=xCnAOhchP0cPUVwpHRSBuInuKVDyQ6KlIcKxMhlBgkI; passport_auth_status=8a96d2f8896232d7c933cc45fccacaee%2C; passport_auth_status_ss=8a96d2f8896232d7c933cc45fccacaee%2C; is_staff_user=false; store-region=cn-hb; store-region-src=uid; Hm_lvt_2667d29c8e792e6fa9182c20a3013175=1725456378,1725510890,1725541326,1725602974; HMACCOUNT=7DDA343F2DF28C97; csrf_session_id=7fdf7ec2639449a47a2a86acac81c5c5; passport_mfa_token=CjfkCbXpKiFVXeRue3R5bR%2FDujfPsWzuawrCISwvSdIiAU8nm%2BDJf5xNnaB%2BxYbIUsZivrUTb0TOGkoKPFD902LhVNc%2FffjS8o1bDfUQxkSDDlGIBBvhD7neUFyZ%2FuRFPjyT644TgaTbTZeKa7Y5ap6a0k6xNqSIjRDcr9sNGPax0WwgAiIBA1V4jwY%3D; odin_tt=2160f51df5648cb31aa63904bc369fac385b2ddda7d9278ee22fcd4581a044e8b1a1c57971271c00f822feb895788d34d5cb99263160bf2b01b33e26a92a6bdb; sid_guard=defbc6a2d4b435bea2ecd060ac46759e%7C1725604472%7C5184000%7CTue%2C+05-Nov-2024+06%3A34%3A32+GMT; uid_tt=ee6a5a4ec47e49ce68418e01db9d3086; uid_tt_ss=ee6a5a4ec47e49ce68418e01db9d3086; sid_tt=defbc6a2d4b435bea2ecd060ac46759e; sessionid=defbc6a2d4b435bea2ecd060ac46759e; sessionid_ss=defbc6a2d4b435bea2ecd060ac46759e; sid_ucp_v1=1.0.0-KDk0ZTZmMjRlMGNmYmE3ODJiMjdmODk4MzM2ZjBmYTgxMjFkZDU0MGYKHwitqrDhhI3rBBD4xOq2BhjHEyAMMOSEk5wGOAJA7wcaAmhsIiBkZWZiYzZhMmQ0YjQzNWJlYTJlY2QwNjBhYzQ2NzU5ZQ; ssid_ucp_v1=1.0.0-KDk0ZTZmMjRlMGNmYmE3ODJiMjdmODk4MzM2ZjBmYTgxMjFkZDU0MGYKHwitqrDhhI3rBBD4xOq2BhjHEyAMMOSEk5wGOAJA7wcaAmhsIiBkZWZiYzZhMmQ0YjQzNWJlYTJlY2QwNjBhYzQ2NzU5ZQ; Hm_lpvt_2667d29c8e792e6fa9182c20a3013175=1725607916; ttwid=1%7Ct8EnMD4NaFxImPyVwxBMcrFkh3YtVQnVCr80fEZ6lZY%7C1725607916%7C9b8b12bd224b6f89f00994758d3bc95af6f34114a08aa2a38badbf1be51fb383"

with open("网页所有链接.txt", "r", encoding="utf-8") as f:
	lines = f.readlines()
	for line in lines:
		get_one_file(line, my_cookies)
		